#!/usr/bin/env Rscript

#
# usage: $ Rscript --vanilla analyze.R input.csv output.pdf [ > output.log]
#        where data.csv must contains three columns: name,population,seat (<- names are important for the first line)
# version: 1.0 for Harvard Dataverse (2023-05-25, liangz)
# citation: Tanimoto, Akiko; Zhao, Liang, 2023, "Subnational Legislatures", https://doi.org/10.7910/DVN/A3QGLL, Harvard Dataverse, V2, UNF:6:1nucS7sjomZIZjs7D72+Rw== [fileUNF] 

#
# range for plotting
#
xrange = c(4.5, 7.5)
yrange = c(1.3, 2.4)

#
# check the command line argument
#

args = commandArgs(trailingOnly=TRUE)
if (length(args) != 2 ) {
	stop("Usage: Rscript --vanilla analyze.R inpu.csv output.pdf [ > output.log]\n", call.=FALSE)
}

#
# read the data.
#

data <-read.csv(args[1])

# logrithm scale 
population <- log10(data$population)
seat <- log10(data$seat)

# normal scale 
#population <- (data$population)
#seat <- (data$seats)

#
# prepare a PDF file for plot
#

pdf(file=args[2], width=10, height=10)

#
# regression model for seats
#

model <- lm(seat ~ population)
summary(model)

#
# plot the seats data
#

plot(population, seat, xlim=xrange, ylim=yrange, type="p", pch=1, xlab="x = log10(Population)", ylab="y = log10(Seats)")
coef <- coefficients(model)
if (coef[2] > 0) {
	func <- paste("y = ", formatC(coef[1], digits=5, format="g"), " + ", formatC(coef[2], digits=5, format="g"), "x")
} else {
	func <- paste("y = ", formatC(coef[1], digits=5, format="g"), " - ", formatC(-coef[2], digits=5, format="g"), "x")
}
adj_r2 <- paste("adj. R2 = ", formatC(summary(model)$adj.r.squared, digits=2, format="g"))
mtext(text=paste("Regression of Seats (solid): ", func, "(", adj_r2, ")"), line=-3, col="black")
abline(model, col="black", lwd=2, lty="solid")

#
# draw a reference line y = 1/3 x^0.4 => log10(y) = log10(1/3) + 0.4 log10(x)
#

abline(a=log10(1/3), b=0.4, col="black", lwd=2, lty="dashed")
func <- paste("y = -0.47712 + 0.4x")
mtext(text=paste("Allometry by Zhao & Peng 2020 (dashed line): ", func), line=-2, col="black")

mtext(text=paste("datafile: ", args[1], "; date plotted: ", Sys.Date()), line=-1, col="black")

dev.off()
quit()
